<--- %%NOBANNER%% --> vcompare.sas
 BackForward

/*-------------------<-- Start of Description-->---------------------\
| The function will need 2 or more than 2 datasets to do the         |
| comparison of the variable names, it will print the variable       |
| names in common between the datasets to the output and the log     |
| window.                                                            |
|---------------------<-- End of Description-->----------------------|
|--------------------------------------------------------------------|
|------------<-- Start of Files or Arguments Needed-->---------------|
| Argument:                                                          |
|    indata: input data set;                                         |
|    compare: common - variables in common between any two data;     |
|             diff   - variables different between any two data;     |
|             both   - variables in common or different;             |
|             all    - variables in common or different;             |
|             default- both;                                         |
|---------------<-- End of Files Arguments Needed-->-----------------|
|--------------------------------------------------------------------|
|------------------<-- Start of Files Created-->---------------------|
| Example: %vcompare(indata=one two three, compare=common); /        |
|          %vcompare(one two three);                                 |
|          %vcompare(one two,compare=diff common);                   |
| Usage: %vcompare(indata=, compare=);                               |
\-------------------<-- End of Files Created-->---------------------*/
%macro vcompare/parmbuff;
/*--------------------------------------------\
| Author:   Duo Zhou;                         |
| Created:  8-30-2001 7:17pm;                 |
| Modified: 11-12-2001 9:14pm;                |
| Purpose:  Compare the variable names between|
|           2 or more datasets;               |
\--------------------------------------------*/
proc format;
   value typef (multilabel)
   1="Num"
   2="Char";
run;
%local ndsn indata tempdata _i_ _j_ _k_ compbuff compare _tmplast_;
%let _tmplast_=&syslast;
%let dsnbuff=%qscan(&syspbuff,1,%str((),));
%let compbuff=%qscan(&syspbuff,2,%str((),));
%let linesize = %SYSFUNC(GETOPTION(linesize));
%if (%index(%quote(&syspbuff),%quote(=))) %then %do;
   %if (%index(%quote(&dsnbuff),%quote(=))) %then %do;
      %if (%index(%quote(%upcase(%sysfunc(compress(%quote(&dsnbuff))))),%quote(DATA=))) %then %do;
         %let indata=%qscan(&dsnbuff,2,%str(=));
         %if (%index(%quote(&compbuff),%quote(=))) %then %do;
            %if (not %index(%quote(%upcase(%sysfunc(compress(%quote(&compbuff))))),%quote(COMPARE=))) %then %do;
               %put ==> Alert! Keyword parameter "%qscan(&compbuff,1,%str(=))" is not defined!;
            %end;
            %else %do;
               %let compare=%qscan(&compbuff,2,%str(=));
            %end;
         %end;
         %else %do;
            %let compare=&compbuff;
         %end;
      %end;
      %else %if (%index(%quote(%upcase(%sysfunc(compress(%quote(&dsnbuff))))),%quote(COMPARE=))) %then %do;
         %let compare=%qscan(&dsnbuff,2,%str(=));
         %if (%index(%quote(&compbuff),%quote(=))) %then %do;
            %if (not %index(%quote(%upcase(%sysfunc(compress(%quote(&compbuff))))),%quote(DATA=))) %then %do;
               %put ==> Alert! Keyword parameter "%qscan(&compbuff,1,%str(=))" is not defined!;
            %end;
            %else %do;
               %let indata=%qscan(&compbuff,2,%str(=));
            %end;
         %end;
         %else %do;
            %let indata=&compbuff;
         %end;
      %end;
      %else %put ==> Alert! Keyword parameter "%qscan(&dsnbuff,1,%str(=))" is not defined!;
   %end;
   %else %if (%index(%quote(&compbuff),%quote(=))) %then %do;
      %if (%index(%quote(%upcase(%sysfunc(compress(%quote(&compbuff))))),%quote(DATA=))) %then %do;
         %let indata=%qscan(&compbuff,2,%str(=));
         %let compare=&dsnbuff;
      %end;
      %else %if (%index(%quote(%upcase(%sysfunc(compress(%quote(&compbuff))))),%quote(COMPARE=))) %then %do;
         %let indata=&dsnbuff;
         %let compare=%qscan(&compbuff,2,%str(=));
      %end;
      %else %do;
         %put ==> Alert! Keyword parameter "%qscan(&compbuff,1,%str(=))" is not defined!;
      %end;
   %end;
%end;
%else %do;
   %let indata=&dsnbuff;
   %let compare=&compbuff;
%end;
%if (not %index(%quote(%upcase(&compare)),COMMON)) and (not %index(%quote(%upcase(&compare)),DIFF)) %then %do;
   %let compare=both;
   %put --> Note: You forgot to provide me a "compare" method: the variables in common or different between;
   %put -->       the datasets! I will take the assumption that you want to compare in both ways (common ;
   %put -->       and different).;
%end;
%if (%quote(&indata) eq) %then %do;
   %put ==> Alert! No dataset is to be compared!
   %goto finish;
%end;
%let ndsn=1;
%let dsn&ndsn=%qscan(&indata,&ndsn,%str( ,));
%do %while(&&dsn&ndsn ne);
   %let ndsn=%eval(&ndsn+1);
   %let dsn&ndsn=%qscan(&indata,&ndsn,%str( ,));   
%end;
%let ndsn=%eval(&ndsn-1);

%if &ndsn<=1 %then %do;
   %put ==> Alert! You just provided one data set, I can do nothing with it.; 
%end;
%else %do;
   %if &ndsn>2 %then %do;
      %put --> Note: You have just given me &ndsn data set. To do the pairwise; 
      %put -->       comparison, it could take extensive time.;
   %end;
   %if ((%index(%upcase(&compare),COMMON)) and (%index(%upcase(&compare),DIFF))) or (%index(%upcase(&compare),BOTH)) %then %do;
      %put --> Note: I will list all the variables in common and the variables different between any two; 
      %put -->       of the &ndsn data sets.;
   %end;
   %else %if ((%index(%upcase(&compare),COMMON)) and (not %index(%upcase(&compare),DIFF))) %then %do;
      %put --> Note: I will list all the variables in common between any two of the &ndsn data sets.; 
   %end;
   %else %if ((not %index(%upcase(&compare),COMMON)) and (%index(%upcase(&compare),DIFF))) %then %do;
      %put --> Note: I will list all the variables different between any two of the &ndsn data sets.; 
   %end;
   %do _i_=1 %to &ndsn;
      proc contents data=&&dsn&_i_ noprint out=_tmp&_i_(keep=name label type length format formatl formatd) memtype=(data view); run;
      %if (not %sysfunc(exist(_tmp&_i_))) %then %do;
         %put ==> Alert! Cannot find "&&dsn&_i_"!;
         %goto finish;
      %end;
      proc sort data=_tmp&_i_; by name; run;
   %end;
   %do _j_=1 %to %eval(&ndsn-1);
      %do _k_=%eval(&_j_+1) %to &ndsn;
            %let Names_InCommon_d1_d2=;
            %let Names_Ind1_NotInd2=;
            %let Names_NotInd1_Ind2=;
            %if (%index(%upcase(&compare),COMMON)) or (%index(%upcase(&compare),BOTH)) or (%index(%upcase(&compare),ALL))%then %do;
            proc sql noprint;%let Names_InCommon_d1_d2=; %let Names_InCommonTypeDiff_d1_d2=;
               /*create table Names_InCommon_d&_j_._d&_k_ as*/
               select t1.name into:Names_InCommon_d1_d2 separated by ", " 
               from _tmp&_j_ as t1, _tmp&_k_ as t2
               where lowcase(t1.name)=lowcase(t2.name)
               order by t1.name;

               select t1.name into:Names_InCommonTypeDiff_d1_d2 separated by ", " 
               from _tmp&_j_ as t1, _tmp&_k_ as t2
               where (lowcase(t1.name)=lowcase(t2.name)) and (t1.type ne t2.type)
               order by t1.name;
            quit;      
   
            proc sql; 
               create table Names_InCommon_Data&_j_._Data&_k_ as
               select t1.name label="Variable", t1.label as label1 label="VarLabel in &&dsn&_j_", t2.label as label2 label="VarLabel in &&dsn&_k_",
                      t1.type as type1 format=typef. label="VarType in &&dsn&_j_", t2.type as type2 format=typef. label="Type in &&dsn&_k_",
                      t1.length as length1 label="VarLength in &&dsn&_j_", t2.length as length2 label="VarLength in &&dsn&_k_",
                      case  
                            when t1.format = ' ' and t1.formatl<1 and t1.formatd< 1 then trimn(left(trimn(left(t1.format))||trimn(left(put(t1.formatl, 10.)))))
                            when (t1.format ne ' ' or t1.formatl>=1) and t1.formatd< 1 then trimn(left(trimn(left(t1.format))||trimn(left(put(t1.formatl, 10.))))||'.')
                           when t1.formatd>=1 then trimn(left(trimn(left(t1.format))||trimn(left(put(t1.formatl, 10.)))||'.'||trimn(left(put(t1.formatd, 10.)))))
                           end as format1 label="VarFormat in &&dsn&_j_",
                      case  
                            when t2.format = ' ' and t2.formatl<1 and t2.formatd< 1 then trimn(left(trimn(left(t2.format))||trimn(left(put(t2.formatl, 10.)))))
                            when (t2.format ne ' ' or t2.formatl>=1) and t2.formatd< 1 then trimn(left(trimn(left(t2.format))||trimn(left(put(t2.formatl, 10.))))||'.')
                           when t2.formatd>=1 then trimn(left(trimn(left(t2.format))||trimn(left(put(t2.formatl, 10.)))||'.'||trimn(left(put(t2.formatd, 10.)))))
                           end as format2 label="VarFormat in &&dsn&_k_"
               from _tmp&_j_ as t1, _tmp&_k_ as t2
               where lowcase(t1.name)=lowcase(t2.name)
               order by t1.name;
            quit;

            proc print data=Names_InCommon_Data&_j_._Data&_k_ label; title "%center(%initcaps(Variables In Common Between Data &&dsn&_j_ and Data &&dsn&_k_))";run;
            data _null_;
               file print;
               nobs1=%nobs(&&dsn&_j_);
               nvars1=%nvars(&&dsn&_j_);
               nobs2=%nobs(&&dsn&_k_);
               nvars2=%nvars(&&dsn&_k_);
               nvarcom=%nobs(Names_InCommon_Data&_j_._Data&_k_);
               put /;
               put @20 @1 "%justify(%initcaps(Summary of Data: &&dsn&_j_ and &&dsn&_k_), justify=center)" /;
               put &linesize*"-"/;
               put @5 "Number of observations in data &&dsn&_j_" @(49+max(length("&&dsn&_j_"),length("&&dsn&_k_"))) nobs1 /;
               put @5 "Number of variables in data &&dsn&_j_" @(49+max(length("&&dsn&_j_"),length("&&dsn&_k_"))) nvars1 /;
               put @5 "Number of observations in data &&dsn&_k_" @(49+max(length("&&dsn&_j_"),length("&&dsn&_k_"))) nobs2 /;
               put @5 "Number of variables in data &&dsn&_k_" @(49+max(length("&&dsn&_j_"),length("&&dsn&_k_"))) nvars2 /;
               put @5 "Number of variables in common" @(49+max(length("&&dsn&_j_"),length("&&dsn&_k_"))) nvarcom /;
            run;
            %put %_repeat('-',&linesize);
            %if (%quote(&Names_InCommon_d1_d2) ne) %then %do;
               %put --> Note: Variables in common between %upcase(&&dsn&_j_) and %upcase(&&dsn&_k_): ;
               %put -->       %upcase(&Names_InCommon_d1_d2)%str(;);
	            %if (%quote(&Names_InCommonTypeDiff_d1_d2) ne) %then %do;
	               %put -->       Variables in common, types are different between %upcase(&&dsn&_j_) and %upcase(&&dsn&_k_): ;
	               %put -->       %upcase(&Names_InCommonTypeDiff_d1_d2)%str(;);
	            %end;
               %else %do;
                  %put -->       and they have the same type too%str(;);
               %end;
               %put -->       please see output for LABEL, FORMAT, LENGTH and TYPE details.; 
            %end;
            %else %do;
               %put --> Note: There are no Variables In Common Between %upcase(&&dsn&_j_) and %upcase(&&dsn&_k_).;
            %end;
            %put %_repeat('-',&linesize);
            /*** cleanup the temp datasets ***/
            proc datasets library=work nolist;
              delete Names_InCommon_data&_j_._data&_k_;
            run;quit;
         %end;
         %if (%index(%upcase(&compare),DIFF)) or (%index(%upcase(&compare),BOTH)) or (%index(%upcase(&compare),ALL)) %then %do;
            proc sql noprint; %let Names_Ind1_NotInd2=;%let Names_NotInd1_Ind2=;
               create table Names_InData&_j_._NotInData&_k_ as
               select t1.name label="Variable in &&dsn&_j_",
               t1.label as label label="VarLabel in &&dsn&_j_",
               t1.type as type format=typef. label="VarType in &&dsn&_j_",
               t1.length as length label="VarLength in &&dsn&_j_",
               case  
                   when t1.format = ' ' and t1.formatl<1 and t1.formatd< 1 then trimn(left(trimn(left(t1.format))||trimn(left(put(t1.formatl, 10.)))))
                   when (t1.format ne ' ' or t1.formatl>=1) and t1.formatd< 1 then trimn(left(trimn(left(t1.format))||trimn(left(put(t1.formatl, 10.))))||'.')
                  when t1.formatd>=1 then trimn(left(trimn(left(t1.format))||trimn(left(put(t1.formatl, 10.)))||'.'||trimn(left(put(t1.formatd, 10.)))))
                  end as format label="VarFormat in &&dsn&_j_"
               from _tmp&_j_ as t1
               where lowcase(t1.name) not in 
                  (select lowcase(name) as name
                   from _tmp&_k_)
               order by t1.name;
               
               select t1.name into:Names_Ind1_NotInd2 separated by ", "
               from _tmp&_j_ as t1
               where lowcase(t1.name) not in 
                  (select lowcase(name) as name
                   from _tmp&_k_);

               create table Names_NotInData&_j_._InData&_k_ as
               select t2.name label="Variable in &&dsn&_k_",
               t2.label as label label="VarLabel in &&dsn&_k_",
               t2.type as type format=typef. label="VarType in &&dsn&_k_",
               t2.length as length label="VarLength in &&dsn&_k_",
               case  
                   when t2.format = ' ' and t2.formatl<1 and t2.formatd< 1 then trimn(left(trimn(left(t2.format))||trimn(left(put(t2.formatl, 10.)))))
                   when (t2.format ne ' ' or t2.formatl>=1) and t2.formatd< 1 then trimn(left(trimn(left(t2.format))||trimn(left(put(t2.formatl, 10.))))||'.')
                  when t2.formatd>=1 then trimn(left(trimn(left(t2.format))||trimn(left(put(t2.formatl, 10.)))||'.'||trimn(left(put(t2.formatd, 10.)))))
                  end as format label="VarFormat in &&dsn&_k_"
               from _tmp&_k_ as t2
               where lowcase(t2.name) not in 
                  (select lowcase(name) as name
                   from _tmp&_j_)
               order by t2.name;

               select t2.name into:Names_NotInd1_Ind2 separated by ", "
               from _tmp&_k_ as t2
               where lowcase(t2.name) not in
                  (select lowcase(name) as name
                   from _tmp&_j_);
            quit;
            proc print data=Names_InData&_j_._NotInData&_k_ label; title "%center(%initcaps(Variables In Data &&dsn&_j_ But Not In Data &&dsn&_k_))";run;
            proc print data=Names_NotInData&_j_._InData&_k_ label; title "%center(%initcaps(Variables Not In Data &&dsn&_j_ But In Data &&dsn&_k_))";run;
            data _null_;
               file print;
               nobs1=%nobs(&&dsn&_j_);
               nvars1=%nvars(&&dsn&_j_);
               nobs2=%nobs(&&dsn&_k_);
               nvars2=%nvars(&&dsn&_k_);
               nvar_in1_un2=%nobs(Names_InData&_j_._NotInData&_k_);
               nvar_un1_in2=%nobs(Names_NotInData&_j_._InData&_k_);
               put /;
               put @1 "%justify(%initcaps(Summary of Data: &&dsn&_j_ and &&dsn&_k_), justify=center)" /;
               put &linesize*"-"/;
               put @5 "Number of observations in data &&dsn&_j_" @(63+max(length("&&dsn&_j_"),length("&&dsn&_k_"))) nobs1 /;
               put @5 "Number of variables in data &&dsn&_j_" @(63+max(length("&&dsn&_j_"),length("&&dsn&_k_"))) nvars1 /;
               put @5 "Number of observations in data &&dsn&_k_" @(63+max(length("&&dsn&_j_"),length("&&dsn&_k_"))) nobs2 /;
               put @5 "Number of variables in data &&dsn&_k_" @(63+max(length("&&dsn&_j_"),length("&&dsn&_k_"))) nvars2 /;
               put @5 "Number of variables in data &&dsn&_j_ but Not In data &&dsn&_k_" @(63+max(length("&&dsn&_j_"),length("&&dsn&_k_"))) nvar_in1_un2 /;
               put @5 "Number of variables Not in data &&dsn&_j_ but In data &&dsn&_k_" @(63+max(length("&&dsn&_j_"),length("&&dsn&_k_"))) nvar_un1_in2 /;
            run;
            %put %_repeat('-',&linesize);
            %if (%quote(&Names_Ind1_NotInd2) ne) %then %do;
               %put --> Note: Variables in %upcase(&&dsn&_j_) and not in %upcase(&&dsn&_k_): ;
               %put -->       %upcase(&Names_Ind1_NotInd2)%str(;);
               %put -->       please see output for label details.;
            %end;
            %else %do;
               %put --> Note: All variables in %upcase(&&dsn&_j_) are in %upcase(&&dsn&_k_).;
            %end;
            %put %_repeat('-',&linesize);
            %if (%quote(&Names_NotInd1_Ind2) ne) %then %do;
               %put --> Note: Variables not in %upcase(&&dsn&_j_) but in %upcase(&&dsn&_k_): ;
               %put -->       %upcase(&Names_NotInd1_Ind2)%str(;);
               %put -->       please see output for label details.;
            %end;
            %else %do;
               %put --> Note: All variables in %upcase(&&dsn&_k_) are in %upcase(&&dsn&_j_). ;
            %end;
            %put %_repeat('-',&linesize);
            /*** cleanup the temp datasets ***/
            proc datasets library=work nolist;
              delete Names_Indata&_j_._NotIndata&_k_ Names_NotIndata&_j_._Indata&_k_;
            run;quit;
         %end;
      %end;
      proc datasets library=work nolist;
           delete _tmp&_j_;
      run;quit;
   %end;
   proc datasets library=work nolist;
        delete _tmp&ndsn;
   run;quit;
%end;
%finish:
%let syslast=&_tmplast_;
%mend vcompare;